library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(patchwork)
Load Dataset
weather_df =
rnoaa::meteo_pull_monitors(
c("USW00094728", "USC00519397", "USS0023B17S"),
var = c("PRCP", "TMIN", "TMAX"),
date_min = "2017-01-01",
date_max = "2017-12-31") %>%
mutate(
name = recode(
id,
USW00094728 = "CentralPark_NY",
USC00519397 = "Waikiki_HA",
USS0023B17S = "Waterhole_WA"),
tmin = tmin / 10,
tmax = tmax / 10) %>%
select(name, id, everything())
## using cached file: /Users/wuxiaoyu/Library/Caches/org.R-project.R/R/rnoaa/noaa_ghcnd/USW00094728.dly
## date created (size, mb): 2023-09-27 13:57:30.794807 (8.524)
## file min/max dates: 1869-01-01 / 2023-09-30
## using cached file: /Users/wuxiaoyu/Library/Caches/org.R-project.R/R/rnoaa/noaa_ghcnd/USC00519397.dly
## date created (size, mb): 2023-09-29 11:56:17.087775 (1.707)
## file min/max dates: 1965-01-01 / 2023-09-30
## using cached file: /Users/wuxiaoyu/Library/Caches/org.R-project.R/R/rnoaa/noaa_ghcnd/USS0023B17S.dly
## date created (size, mb): 2023-09-27 13:57:51.612726 (0.994)
## file min/max dates: 1999-09-01 / 2023-09-30
weather_df
## # A tibble: 1,095 × 6
## name id date prcp tmax tmin
## <chr> <chr> <date> <dbl> <dbl> <dbl>
## 1 CentralPark_NY USW00094728 2017-01-01 0 8.9 4.4
## 2 CentralPark_NY USW00094728 2017-01-02 53 5 2.8
## 3 CentralPark_NY USW00094728 2017-01-03 147 6.1 3.9
## 4 CentralPark_NY USW00094728 2017-01-04 0 11.1 1.1
## 5 CentralPark_NY USW00094728 2017-01-05 0 1.1 -2.7
## 6 CentralPark_NY USW00094728 2017-01-06 13 0.6 -3.8
## 7 CentralPark_NY USW00094728 2017-01-07 81 -3.2 -6.6
## 8 CentralPark_NY USW00094728 2017-01-08 0 -3.8 -8.8
## 9 CentralPark_NY USW00094728 2017-01-09 0 -4.9 -9.9
## 10 CentralPark_NY USW00094728 2017-01-10 0 7.8 -6
## # ℹ 1,085 more rows
Revisit scatterplot
weather_df %>%
ggplot(aes(x = tmin, y = tmax)) +
geom_point(aes(color = name), alpha = .5)
## Warning: Removed 15 rows containing missing values (`geom_point()`).
Add labels
weather_df %>%
ggplot(aes(x = tmin, y = tmax)) +
geom_point(aes(color = name), alpha = .5) +
labs(
title = "Temperature plot",
x = "Minimum daily temperature (C)",
y = "Maxiumum daily temperature (C)",
caption = "Data from the rnoaa package"
)
## Warning: Removed 15 rows containing missing values (`geom_point()`).
Scales
weather_df %>%
ggplot(aes(x = tmin, y = tmax)) +
geom_point(aes(color = name), alpha = .5) +
labs(
title = "Temperature plot",
x = "Minimum daily temperature (C)",
y = "Maxiumum daily temperature (C)",
caption = "Data from the rnoaa package") +
scale_x_continuous(
breaks = c(-15, 0, 15),
labels = c("-15º C", "0", "15"))
## Warning: Removed 15 rows containing missing values (`geom_point()`).
weather_df %>%
ggplot(aes(x = tmin, y = tmax)) +
geom_point(aes(color = name), alpha = .5) +
labs(
title = "Temperature plot",
x = "Minimum daily temperature (C)",
y = "Maxiumum daily temperature (C)",
caption = "Data from the rnoaa package") +
scale_x_continuous(
breaks = c(-15, 0, 15),
labels = c("-15ºC", "0", "15"),
limits = c(-20, 30)) +
# xlim() can be used to control the plot limits in the X axis
scale_y_continuous(
trans = "sqrt",
# scale_y_sqrt() can be added to a ggplot object to transform the Y scale
position = "right")
## Warning in self$trans$transform(x): NaNs produced
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 90 rows containing missing values (`geom_point()`).
weather_df %>%
ggplot(aes(x = tmin, y = tmax)) +
geom_point(aes(color = name), alpha = .5) +
labs(
title = "Temperature plot",
x = "Minimum daily temperature (C)",
y = "Maxiumum daily temperature (C)",
caption = "Data from the rnoaa package") +
scale_color_hue(name = "Location", h = c(100, 300))
## Warning: Removed 15 rows containing missing values (`geom_point()`).
# arguments to scale_color_hue() control the color scale and the name in the plot legend
Themes
ggp_temp_plot =
weather_df %>%
ggplot(aes(x = tmin, y = tmax)) +
geom_point(aes(color = name), alpha = .5) +
labs(
title = "Temperature plot",
x = "Minimum daily temperature (C)",
y = "Maxiumum daily temperature (C)",
caption = "Data from the rnoaa package"
) +
viridis::scale_color_viridis(
name = "Location",
discrete = TRUE
)
ggp_temp_plot
## Warning: Removed 15 rows containing missing values (`geom_point()`).
ggp_temp_plot +
theme(legend.position = "bottom")
## Warning: Removed 15 rows containing missing values (`geom_point()`).
# change the position of legend to bottom
theme built in
ggp_temp_plot +
theme_bw() +
theme(legend.position = "bottom")
## Warning: Removed 15 rows containing missing values (`geom_point()`).
theme classic
ggp_temp_plot +
theme_classic() +
theme(legend.position = "bottom")
## Warning: Removed 15 rows containing missing values (`geom_point()`).
learning assessment
ggplot(weather_df, aes(x = date, y = tmax, color = name)) +
geom_smooth(se = FALSE) +
geom_point(aes(size = prcp), alpha = .75) +
labs(
title = "Temperature plot",
x = "Date",
y = "Maxiumum daily temperature (C)",
caption = "Data from the rnoaa package"
) +
viridis::scale_color_viridis(discrete = TRUE) +
theme_minimal() +
theme(legend.position = "bottom")
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 3 rows containing missing values (`geom_point()`).
data argument in
geom_*
central_park =
weather_df %>%
filter(name == "CentralPark_NY")
waikiki =
weather_df %>%
filter(name == "Waikiki_HA")
# split weather_df into separate datasets for Central Park and Waikiki
ggplot(data = waikiki, aes(x = date, y = tmax, color = name)) +
geom_point() +
geom_line(data = central_park)
## Warning: Removed 3 rows containing missing values (`geom_point()`).
# use one in the ggplot() call and another in geom_line()
patchwork
tmax_tmin_p =
weather_df %>%
ggplot(aes(x = tmax, y = tmin, color = name)) +
geom_point(alpha = .5) +
theme(legend.position = "none")
prcp_dens_p =
weather_df %>%
filter(prcp > 0) %>%
ggplot(aes(x = prcp, fill = name)) +
geom_density(alpha = .5) +
theme(legend.position = "none")
tmax_date_p =
weather_df %>%
ggplot(aes(x = date, y = tmax, color = name)) +
geom_point(alpha = .5) +
geom_smooth(se = FALSE) +
theme(legend.position = "bottom")
(tmax_tmin_p + prcp_dens_p) / tmax_date_p
## Warning: Removed 15 rows containing missing values (`geom_point()`).
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 3 rows containing missing values (`geom_point()`).
data manipulation
weather_df %>%
mutate(name = forcats::fct_reorder(name, tmax)) %>%
ggplot(aes(x = name, y = tmax)) +
geom_violin(aes(fill = name), color = "blue", alpha = .5) +
theme(legend.position = "bottom")
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `name = forcats::fct_reorder(name, tmax)`.
## Caused by warning:
## ! `fct_reorder()` removing 3 missing values.
## ℹ Use `.na_rm = TRUE` to silence this message.
## ℹ Use `.na_rm = FALSE` to preserve NAs.
## Warning: Removed 3 rows containing non-finite values (`stat_ydensity()`).
# reorders name according to tmax values in each name